unfilled_cands<-read.csv("cand_demos.csv")
set.seed(94305)

#because this dataset is non-anonymized and income is based on L2's values,
# income in this replication file is removed and replaced with a random vector 
#all other variables are based on public information 
unfilled_cands$CommercialData_EstimatedHHIncomeAmount<- rnorm(NROW(unfilled_cands))
  

ufc<-unfilled_cands %>% group_by(year) %>% summarise(
  nonwhite = mean(nonwhite, na.rm=T),
  age = mean(age, na.rm=T),
  female = mean(female, na.rm=T),
  CommercialData_EstimatedHHIncomeAmount = mean(CommercialData_EstimatedHHIncomeAmount, na.rm=T)
)
ufc$CommercialData_EstimatedHHIncomeAmount<- ufc$CommercialData_EstimatedHHIncomeAmount/5000
ufc$nonwhite<-ufc$nonwhite*100
ufc$female<-ufc$female*100

ufc<- ufc%>% pivot_longer(!year, names_to="Variable", values_to="val" )
ufc$Variable<- gsub("age","Age",ufc$Variable)
ufc$Variable<- gsub("female","% Female",ufc$Variable)
ufc$Variable<- gsub("nonwhite","% Nonwhite",ufc$Variable)
ufc$Variable<- gsub("CommercialData_EstimatedHHIncomeAmount","Income (5k)",ufc$Variable)


ufc2<- subset(ufc, ufc$year>2009)
#figure 3
ggplot(data=ufc2, aes(x=year, y=val, color=Variable, shape=Variable)) +theme_light()+
  geom_line()+geom_point()+   
  geom_vline(xintercept=2017, linetype="dashed")+
  ylim(20,60)+ theme(legend.position="bottom")+scale_color_brewer(palette = "Set1")+ylab(" ")+
  xlab("Year")+ ggtitle("")+scale_x_continuous(limits = c(2011, 2021), breaks = seq(2011,2021, by = 2))

  
mat_ftn<- function(unfilled_cands){
  mat_a<- matrix(nrow=6, ncol=7)
  mat_a<-data.frame(mat_a)
  
  
  #all candidates 
  mat_a[4,3]<-t.test(unfilled_cands$nonwhite[unfilled_cands$Got.voucher==1],unfilled_cands$nonwhite[unfilled_cands$Got.voucher==0])$estimate[1]
  mat_a[4,2]<-t.test(unfilled_cands$nonwhite[unfilled_cands$Got.voucher==1],unfilled_cands$nonwhite[unfilled_cands$Got.voucher==0])$estimate[2]
  mat_a[5,3]<-t.test(unfilled_cands$age[unfilled_cands$Got.voucher==1],unfilled_cands$age[unfilled_cands$Got.voucher==0])$estimate[1]
  mat_a[5,2]<-t.test(unfilled_cands$age[unfilled_cands$Got.voucher==1],unfilled_cands$age[unfilled_cands$Got.voucher==0])$estimate[2]
  mat_a[3,3]<-t.test(unfilled_cands$female[unfilled_cands$Got.voucher==1],unfilled_cands$female[unfilled_cands$Got.voucher==0])$estimate[1]
  mat_a[3,2]<-t.test(unfilled_cands$female[unfilled_cands$Got.voucher==1],unfilled_cands$female[unfilled_cands$Got.voucher==0])$estimate[2]
  mat_a[2,3]<-t.test(unfilled_cands$CommercialData_EstimatedHHIncomeAmount[unfilled_cands$Got.voucher==1],
                     unfilled_cands$CommercialData_EstimatedHHIncomeAmount[unfilled_cands$Got.voucher==0])$estimate[1]
  mat_a[2,2]<-t.test(unfilled_cands$CommercialData_EstimatedHHIncomeAmount[unfilled_cands$Got.voucher==1],
                     unfilled_cands$CommercialData_EstimatedHHIncomeAmount[unfilled_cands$Got.voucher==0])$estimate[2]
  
  mat_a[4,4]<-t.test(unfilled_cands$nonwhite[unfilled_cands$Got.voucher==1],unfilled_cands$nonwhite[unfilled_cands$Got.voucher==0])$stderr
  mat_a[5,4]<-t.test(unfilled_cands$age[unfilled_cands$Got.voucher==1],unfilled_cands$age[unfilled_cands$Got.voucher==0])$stderr
  mat_a[3,4]<-t.test(unfilled_cands$female[unfilled_cands$Got.voucher==1],unfilled_cands$female[unfilled_cands$Got.voucher==0])$stderr
  mat_a[2,4]<-t.test(unfilled_cands$CommercialData_EstimatedHHIncomeAmount[unfilled_cands$Got.voucher==1],
                     unfilled_cands$CommercialData_EstimatedHHIncomeAmount[unfilled_cands$Got.voucher==0])$stderr
  
  #subset to candidates who make it to the general
  viable<- subset(unfilled_cands, unfilled_cands$general==1)
  
  mat_a[4,6]<-t.test(viable$nonwhite[viable$Got.voucher==1],viable$nonwhite[viable$Got.voucher==0])$estimate[1]
  mat_a[4,5]<-t.test(viable$nonwhite[viable$Got.voucher==1],viable$nonwhite[viable$Got.voucher==0])$estimate[2]
  mat_a[5,6]<-t.test(viable$age[viable$Got.voucher==1],viable$age[viable$Got.voucher==0])$estimate[1]
  mat_a[5,5]<-t.test(viable$age[viable$Got.voucher==1],viable$age[viable$Got.voucher==0])$estimate[2]
  mat_a[3,6]<-t.test(viable$female[viable$Got.voucher==1],viable$female[viable$Got.voucher==0])$estimate[1]
  mat_a[3,5]<-t.test(viable$female[viable$Got.voucher==1],viable$female[viable$Got.voucher==0])$estimate[2]
  mat_a[2,6]<-t.test(viable$CommercialData_EstimatedHHIncomeAmount[viable$Got.voucher==1],
                     viable$CommercialData_EstimatedHHIncomeAmount[viable$Got.voucher==0])$estimate[1]
  mat_a[2,5]<-t.test(viable$CommercialData_EstimatedHHIncomeAmount[viable$Got.voucher==1],
                     viable$CommercialData_EstimatedHHIncomeAmount[viable$Got.voucher==0])$estimate[2]
  
  mat_a[4,7]<-t.test(viable$nonwhite[viable$Got.voucher==1],viable$nonwhite[viable$Got.voucher==0])$stderr
  mat_a[5,7]<-t.test(viable$age[viable$Got.voucher==1],viable$age[viable$Got.voucher==0])$stderr
  mat_a[3,7]<-t.test(viable$female[viable$Got.voucher==1],viable$female[viable$Got.voucher==0])$stderr
  mat_a[2,7]<-t.test(viable$CommercialData_EstimatedHHIncomeAmount[viable$Got.voucher==1],
                     viable$CommercialData_EstimatedHHIncomeAmount[viable$Got.voucher==0])$stderr
  mat_a<- round(mat_a,2)
  mat_a[1,]<-c(" ","non-voucher cand","voucher cand","se","non-voucher cand","voucher cand","se")
  mat_a$X1<-c(" ","income","female",
              "nonwhite","age"," ")
  return(mat_a)
}

mat_all<- mat_ftn(unfilled_cands)

unfilled_cands<- subset(unfilled_cands,unfilled_cands$year>=2015)
mat15<- mat_ftn(unfilled_cands)

names(mat_all)<- mat_all[1,]
names(mat15)<- mat15[1,]
print(xtable(mat_all[2:5,]), include.rownames=FALSE)  # table a4
print(xtable(mat15[2:5,]), include.rownames=FALSE)  # table a5

